SenseMyFEUP data
Data loading
Data is filtered by travelmode (car and bus) and date (April 2016).
Intersession times
Visualizing intersession times
All intersession time Porto April16
Intersession time along the week
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
Histogram intersession time.
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
Number of sessions per way_id
Filtering by sessions per way_id
Edges with >50 sessions.
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
Top 10% points
Map points April 2016
Showing maps
Traffic Map April Porto 2016 all
Traffic Map April 2016 all >50
Traffic day Map April 2016
Traffic night Map April 2016
Top 12 edges

Map Intersession time (<1.3h) day.

Map Intersession time (<2h)night.

–>
#Sessions by date >50
Day of the week

By day

By hour

Speeds >50
df_hotedges_april16pt %>%
group_by(hour =hour(time)) %>%
summarise(avg_speed = mean((speed*18)/5)) %>%
ggplot(aes(hour, avg_speed)) +
geom_line() +
scale_x_continuous(breaks = seq(0,23,1))+
ylab("Avg speed km/h")

Speed by week

Speed by day
Speed by hour
Number of session per hour
Ways_ids per half hour
prueba <- table(cut(filter(df_hotedges_april16pt, day(time) < 7)$time, breaks = "30 mins"))
plot(prueba, xlab = "date", ylab = "frequency")
Time serie per way_id
filter(df_hotedges_april16pt, way_id == 37141967 ) %>%
group_by(day(time)) %>%
ggplot(aes(x= time, y= (speed*18)/5)) +
geom_line()
---
title: 'Sense My FEUP - April 2016 Data'
author: "Daniela S. Gil"
output: html_notebook
---

```{r echo = FALSE, eval=FALSE}
save.image("SensemyWorkSpace.RData")
#load("df_first_session_cars_april16.Rda")
load("SensemyWorkSpace.RData")

```


# SenseMyFEUP data
## Data loading
 Data is filtered by travelmode (car and bus) and date (April 2016).      

```{r echo=FALSE}
#Filtering by rectangle in Porto 

df_edges_april16 <- df_speed

df_edges_april16pt <- df_speed %>% 
  filter(lat < 41.1859352808155, lat > 41.1364726546, lon > -8.6912940681405, lon < -8.55396934228)
```

## Intersession times

### Visualizing intersession times
```{r echo=FALSE, eval=FALSE}

#Getting data from df_speed

df_intersession_april16pt <- df_edges_april16pt %>% 
  group_by(way_id, session_id) %>% 
  summarise(seconds = min(seconds))

# Transforming seconds to timestamp and calculating intersession time.

df_intersession_april16pt$time <- as.POSIXct(df_intersession_april16pt$seconds, origin="1970-01-01")
df_edges_april16pt$time <- as.POSIXct(df_edges_april16pt$seconds, origin="1970-01-01")

df_intersession_april16pt <- df_intersession_april16pt  %>%
  arrange(desc(way_id), time) %>% 
  mutate(intersession_time = c(0,as.numeric(diff(time), units="mins")))

# Remove Min column
# df_intersession_april$min <- NULL 
```

All intersession time Porto April16 
```{r echo=FALSE}
plot_ly(y = df_intersession_april16pt$intersession_time, type = "box", name = "All way_ids") %>% 
  add_boxplot(y= filter(df_intersession_april16pt, intersession_time > 0 )$intersession_time, name = "At least 2 sessions") %>% 
  layout(title="Intersession time (mins)", yaxis = list(range = c(0,5000)))

```

Intersession time along the week 
```{r echo=FALSE}
p <- ggplot(df_intersession_april16pt, aes(x = weekdays(time), y = intersession_time, fill = interaction(weekdays(time)) )) + 
  geom_boxplot() + 
  xlab("Days of the week" ) +
  ylab("Intersession time (mins)") +
  ggtitle("Intersession time(min) in the week ") + 
  theme(legend.position="none") +
    scale_x_discrete(limits = c("Domingo", "Segunda", "Terça", "Quarta", "Quinta", 
    "Sexta", "Sábado")) +
  coord_cartesian(ylim = c(0,10000))


p <- plotly_build(p)
p

```

Histogram intersession time.
```{r echo=FALSE}

int2 <- df_intersession_april16pt %>%
  filter(intersession_time > 0) %>% 
  ggplot(aes(intersession_time/60)) + 
  geom_histogram(binwidth = 9) + 
  ggtitle("Intersession time (hours)") +
  coord_cartesian(xlim=c(0,300))
int2 <- ggplotly(int2)

int3 <- df_intersession_april16pt %>%
  filter(intersession_time > 0) %>% 
  ggplot(aes(intersession_time/60)) + 
  geom_histogram(binwidth = 1) +
  coord_cartesian(xlim = c(0,25))
int3 <- ggplotly(int3)

subplot(int2,int3, nrows = 2)
```

Number of sessions per way_id

```{r echo=FALSE,  message=FALSE}
int1 <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>%  
  ggplot(aes(x =n_sessions)) +
  geom_histogram(binwidth = 10) +
  coord_cartesian(xlim = c(0,150)) +
  ggtitle("Number of sessions per way_id in Porto") +
  geom_vline(xintercept = 50, size = 1, colour = "#FF3721",
                   linetype = "dashed")

ggplotly(int1)
```

### Filtering by sessions per way_id 

```{r echo=FALSE}
#Create df with way_ids with >50 sessions

df_id_hotedges_april16pt <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>%  
  filter(n_sessions >= 50) 

df_hotedges_april16pt <- df_id_hotedges_april16pt %>% 
  merge(y= df_edges_april16pt, by="way_id")

df_superhotedges_april16pt <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>%  
  filter(n_sessions >= 200) 

df_superhotedges_april16pt <- df_id_hotedges_april16pt %>% 
  merge(y= df_edges_april16pt, by="way_id")

```

```{r echo=FALSE, eval=FALSE} 
# POINTS for mapping

#All with sessions > 50
df_points_hotedges_april16pt <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>% 
  filter(n_sessions >= 50) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 

df_points_superhotedges_april16pt <- df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(n_sessions = n()) %>% 
  filter(n_sessions >= 200) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 


df_superhotedges_april16pt <-mutate(df_superhotedges_april16pt, class = ifelse(hour(time) >=7 & hour(time) <= 20, 
                        "day",
                        "night"))


#Day
df_points_hotedges_april16pt_d <- df_hotedges_april16pt %>% 
  filter(hour(time) >=7, hour(time)<= 20, n_sessions >= 50) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 

# Night
df_points_april16pt_night_n <- df_hotedges_april16pt %>% 
  filter(hour(time) <7 | hour(time) > 20, n_sessions >= 20) %>% 
  merge( y = df_points_edge_osm_april16pt, by = "way_id") 

```

```{r echo=FALSE}
df_intersession_april16pt <- mutate(df_intersession_april16pt, class = ifelse(hour(time) >=7 & hour(time) <= 20, 
                        "day",
                        "night"))
  
```


### ECDF 
```{r echo=FALSE}
e1 <- ggplot(subset(df_intersession_april16pt, intersession_time > 0), aes(intersession_time)) + 
  stat_ecdf(geom = "step") +
  xlab("Intersession time(mins)")

e2 <- ggplot(subset(df_intersession_april16pt, intersession_time > 0), aes(intersession_time)) + 
  scale_x_log10() +stat_ecdf(geom = "step")  + xlab("Intersession time Log")

grid.arrange(e1, e2, ncol= 2, top = "Intersession time all sessions Porto (mins)")
```
 
```{r echo=FALSE, eval=FALSE}

#Dont, without colors
e3 <-  ggplot(df_hotedges_april16pt_day, aes(avg_itm)) + 
  scale_x_log10(breaks = seq(0,1000,200)) +
  stat_ecdf(geom = "step")  + 
  xlab(" Log Intersession time(min)") 
  
e4 <-  ggplot(df_hotedges_april16pt_night, aes(x = avg_itm)) + 
  scale_x_log10(breaks = seq(0,1000,200)) +
  stat_ecdf(geom = "step")  +
  xlab(" Log Intersession time(min)") 

grid.arrange(e3, e4, ncol= 2, top  = "ECDF Average Intersession time day(>50) ")
```

```{r echo=FALSE, eval=FALSE}
# Classifying points by day for ECDF 
df_intersession_april16pt <-mutate(df_intersession_april16pt, class = ifelse(hour(time) >=7 & hour(time) <= 20, 
                        "day",
                        "night"))

df_intersession_april16pt %>% 
  filter(intersession_time >0) %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  ggplot( aes(intersession_time, color= class )) + 
  scale_x_log10() + stat_ecdf(geom = "step") + 
  ggtitle("Intersession time >50 sessions Porto")
```


```{r echo=FALSE}
df_intersession_april16pt %>% 
  filter(intersession_time >0) %>% 
  subset(way_id %in% df_superhotedges_april16pt$way_id) %>% 
  ggplot( aes(intersession_time, color= class )) + 
  scale_x_log10() + stat_ecdf(geom = "step") + 
  ggtitle("Intersession time >200 sessions Porto")
```


```{r  echo = FALSE, eval = FALSE }

# Same as plotly before but separate. Ignore. 

ECDF all by #points 
ecdf_all <- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + xlab("All points")

ecdf_day<- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + title("ECDF day")+ xlab("Points at day")

ecdf_night <- ggplot(df_osm_edge, aes(points)) + 
  scale_x_log10() + stat_ecdf(geom = "step") + title("ECDF night")+ xlab("Points at night")

grid.arrange(ecdf_all,ecdf_day,ecdf_night,  ncol=3)

```


ECDF by #sessions 

```{r echo=FALSE}
df_intersession_april16pt %>% 
  group_by(way_id) %>% 
  summarise(sessions = n() ) %>% 
  ggplot(aes(sessions)) + 
  scale_x_log10() + stat_ecdf(geom = "step") +
  ggtitle("ECDF number of sessions all Porto")
  
```

###  Edges with >50 sessions. 


```{r echo=FALSE, eval=TRUE}
low1 <- df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  subset( intersession_time < quantile(df_intersession_april16pt$intersession_time, 0.35)) %>% 
  ggplot(aes(intersession_time)) +
  geom_histogram(bins = 10) + 
  scale_x_continuous(breaks = seq(0,200,10))+
  xlab("Intersession time (mins)") + 
  ggtitle("Lowest 35% intersession time >50")

ggplotly(low1)
```


```{r echo=FALSE, eval=TRUE}
c1 <- df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  ggplot(aes(intersession_time, fill = class)) + 
  geom_histogram() + 
  xlab("Intersession time (min)") +
  xlim(c(0,3000)) +
  ylim(c(0,5000)) +
  ggtitle("Intersession time >50 day/night")

ggplotly(c1) 
```



Top 10% points
```{r echo = FALSE, eval=TRUE}

#Not necessary 

summary(df_points_hotedges_april16pt$points)
quantile(df_points_hotedges_april16pt$points, 0.90)

 boxplot(df_points_hotedges_april16pt$points)  


```


## Map points April 2016

```{r echo=FALSE, eval=FALSE}
#Necessary for mapping 

#Reorder columns
df_osm_edge <- df_points_hotedges_april16pt %>% 
  select(way_id, points)

df_osm_edge <- df_points_superhotedges_april16pt %>% 
  select(way_id, points)

df_osm_edge <- df_points_superhotedges_april16pt %>% 
  subset(way_id %in% df_osm_edge_ids$way_id) %>% 
  select(way_id, points)


#List before passing to map.
list_osm_edge <- df_osm_edge[, 1]

```

```{r echo=FALSE, eval=FALSE}
# Creating the empty map of Porto

#Points
#boxplot(df_osm_edge$points)

feup <-   quantile(df_osm_edge$points, 0.90)
superior <- quantile(df_osm_edge$points, 0.7)
medio <- quantile(df_osm_edge$points, 0.5)
low <- quantile(df_osm_edge$points, 0.5)
low <- 0

m <- leaflet() %>% setView(lng=-8.61419, lat=41.16311, zoom = 13)
m <- addTiles(m) 
m <- addProviderTiles(m, "CartoDB.Positron")


counter <- 1

for(way_id in list_osm_edge) {
  
  df_way_id <- dbGetQuery(con_osm, paste0("SELECT st_astext(st_transform(way, 4326)) AS line FROM planet_osm_line WHERE planet_osm_line.osm_id = ", way_id))
  
  line <- df_way_id$line
  line <- as.character(line)
  
  line <- unlist(strsplit(line, split='(', fixed=TRUE))[2]
  line <- substr(line, 1, nchar(line) - 1)
  
  parsed_line <- strsplit(line, ",")
  
  lons <- c()
  lats <- c()
  
  if(length(parsed_line) != 0) {
    
    #Defining lons and lats as variables to use later.

    for(coord in parsed_line[[1]]) {
      
      lon <- unlist(strsplit(coord, split=' ', fixed=TRUE))[1]
      lat <- unlist(strsplit(coord, split=lon, fixed=TRUE))[2]
      lat <- substr(lat, 2, nchar(lat))
      
      lon <- as.numeric(lon)
      lat <- as.double(lat)
      
      lons <- c(lons, lon)
      lats <- c(lats, lat)
      
    }
    
    # Deciding the color of the point.

    if(df_osm_edge[counter, 2] > feup) {
          
          m <- addPolylines(m, lons, lats, color='blue', popup = paste("", way_id, sep = "")) 
    
        } else if (df_osm_edge[counter, 2] >= superior && df_osm_edge[counter, 2] <= feup) {
          
          m <- addPolylines(m, lons, lats, color='red', popup = paste("", way_id, sep = ""))  
        
        } else if (df_osm_edge[counter, 2] >= medio && df_osm_edge[counter, 2] <= superior) {
          
          m <- addPolylines(m, lons, lats, color='yellow', popup = paste("", way_id, sep = ""))
          
        } else if (df_osm_edge[counter, 2] >= low && df_osm_edge[counter, 2] <= medio) {
          
          m <- addPolylines(m, lons, lats, color='green', popup = paste("", way_id, sep = ""))
          
        }
        
        counter <- counter + 1 
        
      }
      
      #content <- paste("w", way_id, sep = "")
      #m <- addPopups(m, lons, lats, content, options = popupOptions(closeButton = TRUE))
      #m <- addLabelOnlyMarkers(m, lons,lats, label = paste("", way_id, sep = ""), 
      #                         labelOptions = labelOptions(noHide = F, textsize = "15px"))
      #print(line)
    
    }

```
### Showing maps 

Traffic  Map April Porto 2016 all
```{r eval= TRUE}
# Showing Map.
m1 <- m
m1
#mapshot(m, url = paste0(getwd(), "/map.html"))
```

Traffic  Map April 2016 all >50
```{r echo=FALSE}
m_50 <- m
m_50 
```

Traffic day Map April 2016
```{r echo=FALSE}
m_50_day <- m
m_50_day
```

Traffic night Map April 2016
```{r echo=FALSE}
m_20_night <- m
m_20_night
```

Top 12 edges 

```{r echo=FALSE}
m_hot <- m
m_hot
```

<!-- Ignore this maps for the moment   
Map Intersession time all 1.30h.
```{r echo=FALSE}
m_itm <- m
m_itm
```

Map Intersession time (<1.3h) day.
```{r echo=FALSE}
m_itm_day <- m
m_itm_day
#mapshot(m_night_semsessions, file = "~/maps/top_intersessions_night_april.png")
```
Map Intersession time (<2h)night.
```{r echo=FALSE}
m_itm_night <- m
m_itm_night
#mapshot(m, file = "~/maps/_all_april.png")
```
-->

## #Sessions by date >50

### Day of the week 
```{r echo= FALSE} 
df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(weekday =wday(time)) %>% 
  summarise(way_ids = n()) %>% 
  ggplot(aes(weekday,way_ids)) + 
  geom_line() +
  scale_x_discrete(limits = c("Domingo", "Segunda", "Terça", "Quarta", "Quinta", 
    "Sexta", "Sábado"))

```

### By day 
```{r echo= FALSE}
df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(day =day(time)) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(day,n)) + 
  geom_line() + 
  scale_x_continuous(breaks = seq(1,30,1))

```

##By hour 
```{r echo= FALSE} 
df_intersession_april16pt %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(hour =hour(time)) %>% 
  summarise(n = n()) %>% 
  ggplot(aes(hour,n)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,23,1))

```

## Speeds >50

```{r}
# Speed by way_id and session.\
avg_speed_wayid <- df_speed %>% 
  subset(way_id %in% df_hotedges_april16pt$way_id) %>% 
  group_by(way_id) %>% 
  summarise(avg_speed = mean((speed*18)/5), n = n() )

ggplot(avg_speed_wayid, aes(avg_speed)) +
  geom_histogram(binwidth = 3) + 
  scale_x_continuous(name = "Avg_speed(km/h)",  breaks = seq(0, 150, 3) )
```

Speed by week 
```{r echo=FALSE}
df_hotedges_april16pt %>% 
  group_by(weekday =wday(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(weekday, avg_speed)) + 
  geom_line() +
  scale_x_discrete(limits = c("Domingo", "Segunda", "Terça", "Quarta", "Quinta", 
    "Sexta", "Sábado")) +
  ylab("Avg speed km/h")
```

Speed by day

```{r echo=FALSE}
df_hotedges_april16pt %>% 
  group_by(day =day(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(day, avg_speed)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,30,1))+
  ylab("Avg speed km/h")
```

Speed by hour
```{r echo=FALSE}
df_hotedges_april16pt %>% 
  group_by(hour =hour(time)) %>% 
  summarise(avg_speed = mean((speed*18)/5)) %>% 
  ggplot(aes(hour, avg_speed)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(0,23,1))+
  ylab("Avg speed km/h")
```

Number of session per hour
```{r echo=FALSE}
df_hotedges_april16pt %>% 
    filter(wday(time) != 1 , wday(time) != 7 ) %>% 
    group_by(hour =hour(time)) %>% 
    summarise(sessions = n_distinct(session_id)) %>% 
  ggplot(aes(hour, sessions)) + 
  geom_line()+
  scale_x_continuous(breaks = seq(0,23,1))
```

Ways_ids per half hour
```{r}
prueba <- table(cut(filter(df_hotedges_april16pt, day(time) < 7)$time, breaks = "30 mins"))
plot(prueba, xlab = "date", ylab = "frequency") 
```

## Time serie per way_id 

```{r echo=FALSE}
df_superhotedges_april16pt %>% 
  filter( way_id == list_osm_edge[3] ) %>% 
  plot_ly(x= ~time, y= ~(speed*18)/5, color = ~class) %>% 
  layout(title = paste("Time series of speed for way_id",list_osm_edge[3], sep = " " ),yaxis = list(title="Speed (km/h)"),
         xaxis = list(title="Date"))

```

```{r}
filter(df_hotedges_april16pt, way_id == 37141967 ) %>% 
  group_by(day(time)) %>% 
  ggplot(aes(x= time, y= (speed*18)/5)) + 
  geom_line()
```




